library(ggplot2)
library(ggpubr)
## Loading required package: magrittr
library(Seurat)
folder="~/corona_project/plots_intigrate/"
### initilization 
cell_type_genes=list("Bowman’s glands"=c("SOX9", "SOX10", "GPX3"),
                     "olfactory HBCs"=c("TP63", "KRT5", "CXCL14", "SOX2", "MEG3"),
                     "olfactory ensheathing glia"=c("S100B", "PLP1", "PMP2","MPZ", "ALX3"),
                     "olfactory microvillar cells"=c("ASCL3", "CFTR", "HEPACAM2", "FOXL1"),
                     "immature neurons"=c("GNG8", "OLIG2", "EBF2", "LHX2", "CBX8"),
                     "mature neurons"=c("GNG13", "EBF2", "CBX8", "RTP1"),
                     "GBCs"=c("HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1"),
                     "sustentacular cells"=c("CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2"))
marker_genes=c("SOX9", "SOX10", "GPX3",
               "TP63", "KRT5", "CXCL14", "SOX2", "MEG3",
               "S100B", "PLP1", "PMP2","MPZ", "ALX3",
               "ASCL3", "CFTR", "HEPACAM2", "FOXL1",
               "GNG8", "OLIG2", "EBF2", "LHX2", "CBX8",
               "GNG13", "EBF2", "CBX8", "RTP1",
               "HES6", "ASCL1", "CXCR4", "SOX2", "EZH2", "NEUROD1", "NEUROG1",
               "CYP2A13", "CYP2J2", "GPX6", "ERMN", "SOX2")
cell_type_names=c("Bowman’s glands",
                  "olfactory HBCs",
                  "olfactory ensheathing glia",
                  "olfactory microvillar cells",
                  "immature neurons",
                  "mature neurons",
                  "GBCs",
                  "sustentacular cells")
gene_path=c("LNPEP", "ACE", "CD143","PRCP","CTSG",
            "PREP", "KLK1_2","CMA1","REN", "MME",
            "THOP1","NLN","AGTR1","AGTR2","MAS1",
            " MRGPRDCPA3","ACE2","AGT","ANPEP","ENPEP","CTSA","ATP6AP2")
###intigration and batch effect removing using Seurat
batch_list=list("P2","P3")
batch_data_list=list("P2"=1,"P3"=1)
for( i in 1:length(batch_list))
{
  print(batch_list[[i]])
  s_object=Read10X(paste("~/corona_project/Input_files/",batch_list[[i]],sep=""))
  s_object=CreateSeuratObject(counts =s_object, min.cells = 0, min.features = 400, project = "P23")
  s_object[["percent.mt"]] <- PercentageFeatureSet(s_object, pattern = "^MT-")
  s_object <- subset(s_object, subset = nFeature_RNA >100 & nFeature_RNA <8000 & percent.mt <10)
  s_object@meta.data[, "run"] <- batch_list[i]
  s_object=NormalizeData(s_object)
  batch_data_list[[i]]=FindVariableFeatures(s_object, selection.method = "vst", nfeatures =5000)
}
## [1] "P2"
## [1] "P3"
batch_data_list
## $P2
## An object of class Seurat 
## 33538 features across 10881 samples within 1 assay 
## Active assay: RNA (33538 features)
## 
## $P3
## An object of class Seurat 
## 33538 features across 5415 samples within 1 assay 
## Active assay: RNA (33538 features)
saveRDS(batch_data_list,"~/corona_project/batch_data_list_5000_23.rds")
set.seed(1)

###intigration and batch effect removing using Seurat
run.anchors <- FindIntegrationAnchors(object.list = batch_data_list, dims = 1:30,anchor.features = 5000)
## Computing 5000 integration features
## Scaling features for provided objects
## Finding all pairwise anchors
## Running CCA
## Merging objects
## Finding neighborhoods
## Finding anchors
##  Found 11489 anchors
## Filtering anchors
##  Retained 8787 anchors
## Extracting within-dataset neighbors
a=run.anchors
run.combined <- IntegrateData(anchorset = run.anchors, dims = 1:30)
## Merging dataset 2 into 1
## Extracting anchors for merged samples
## Finding integration vectors
## Finding integration vector weights
## Integrating data
## Warning: Adding a command log without an assay associated with it
b=run.combined
sce_3_1_1_before=run.combined
saveRDS(sce_3_1_1_before,"~/corona_project/sce_3_1_1_before_5000_23.rds")
### Dim reduction and clustering
DefaultAssay(run.combined) <- "integrated"
run.combined <- ScaleData(run.combined, verbose = FALSE)
run.combined <- RunPCA(run.combined, npcs = 30, verbose = FALSE)
run.combined <- RunUMAP(run.combined, reduction = "pca", dims = 1:30)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 10:37:57 UMAP embedding parameters a = 0.9922 b = 1.112
## 10:37:57 Read 16296 rows and found 30 numeric columns
## 10:37:57 Using Annoy for neighbor search, n_neighbors = 30
## 10:37:57 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 10:37:59 Writing NN index file to temp file /tmp/RtmpUXkOCb/file539e1caf9252
## 10:37:59 Searching Annoy index using 1 thread, search_k = 3000
## 10:38:03 Annoy recall = 100%
## 10:38:03 Commencing smooth kNN distance calibration using 1 thread
## 10:38:04 Initializing from normalized Laplacian + noise
## 10:38:05 Commencing optimization for 200 epochs, with 707164 positive edges
## 10:38:16 Optimization finished
run.combined <- FindNeighbors(run.combined, reduction = "pca", dims = 1:30)
## Computing nearest neighbor graph
## Computing SNN
run.combined <- FindClusters(run.combined, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 16296
## Number of edges: 654023
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9526
## Number of communities: 22
## Elapsed time: 2 seconds
### cell-types annotation
for(i in 0:as.integer(length(marker_genes)/4))
{
  a=(i*4+1)
  b=((i+1)*4)
  if(i==as.integer(length(marker_genes)/4))
    b=length(marker_genes)
  if(a>length(marker_genes))
     next
  print(i)
  print(VlnPlot(run.combined, features = marker_genes[a:b],ncol=2))
  print(FeaturePlot(run.combined, features = marker_genes[a:b],cols=c("lightgrey", "red"),label = TRUE,pt.size=2,order = TRUE))
}
## [1] 0
## Warning: Using `as.character()` on a quosure is deprecated as of rlang 0.3.0.
## Please use `as_label()` or `as_name()` instead.
## This warning is displayed once per session.

## [1] 1

## [1] 2

## [1] 3

## [1] 4
## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## Warning: Could not find FOXL1 in the default search locations, found in RNA
## assay instead

## [1] 5
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## [1] 6
## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## Warning: Could not find CBX8 in the default search locations, found in RNA assay
## instead

## [1] 7

## [1] 8

## [1] 9

run.combined<-RenameIdents(run.combined,`13`="Immature neurons", `14`="Mature neurons", `3`="Olfactory HBCs",`19`="Olfactory microvillar cells",
                   `5`="Bowman's gland", `16`="Olfactory ensheathing glia", `9`="Sustentacular cells",`20`="GBCs" )
run.combined=subset(run.combined, idents = c("Immature neurons","Mature neurons","Olfactory HBCs","Olfactory microvillar cells","Bowman's gland",
                          "Olfactory ensheathing glia","Sustentacular cells","GBCs"))
sce_3_1_1_after=run.combined
saveRDS(sce_3_1_1_after,"~/corona_project/sce_3_1_1_after_5000_23.rds")
### Dim plot to show clusters clearly for each cell type
DimPlot(run.combined, reduction = "umap")

### FeaturePlot to show the expression of ACE2 in each cell type
FeaturePlot(run.combined, features = "ACE2",combine = FALSE,cols=c("lightgrey", "red"),pt.size=2,order =TRUE)
## Warning: Could not find ACE2 in the default search locations, found in RNA assay
## instead
## [[1]]

### Percentage of ACE2 expressed's cells in each cell type
a=table(Seurat::Idents(run.combined)[colnames(run.combined[,as.matrix(run.combined@assays$RNA["ACE2",])>0])])
print(sum(a))
## [1] 32
a=a*100/sum(a)
df=data.frame(cell_type=names(a),count_percentage=a)
ggplot(data=df, aes(x=cell_type, y=count_percentage.Freq, fill=cell_type)) + 
  geom_bar(stat="identity") +  theme_classic()+ theme(axis.text.x = element_text(angle = 90, hjust = 1),legend.position="none")

dim(run.combined@assays$RNA)
## [1] 33538  3906
DefaultAssay(run.combined) <- "RNA"
### Finding differential expressed genes (HBC+;ACE2+) vs B (HBC+;ACE2-)
HBC=subset(run.combined, idents = "Olfactory HBCs")
pos=c(rep(1,length(colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(HBC[,as.matrix(HBC@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(HBC@meta.data))]==pos_neg)
## [1] 1087
HBC=AddMetaData(
  object = HBC,
  metadata = pos_neg,
  col.name = 'HBC'
)
Idents(HBC)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(HBC)))]==pos_neg)
## [1] 1087
HBC_genes=FindMarkers(HBC, ident.1=1,ident.2=-1,test.use = "wilcox")
HBC_genes_a=rownames(HBC_genes)[HBC_genes[2]>1 & HBC_genes[1]<0.05]
HBC_genes_b=rownames(HBC_genes)[HBC_genes[2]<-1 & HBC_genes[1]<0.05]
HBC_genes_names=c(HBC_genes_a,HBC_genes_b)

### Finding differential expressed genes (Sustentacular+;ACE2+) vs B (Sustentacular+;ACE2-)
Sustentacular=subset(run.combined, idents = "Sustentacular cells")
pos=c(rep(1,length(colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])>0]))))
names(pos)=colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])>0])
neg=c(rep(-1,length(colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])<=0]))))
names(neg)=colnames(Sustentacular[,as.matrix(Sustentacular@assays$RNA["ACE2",])<=0])
pos_neg=c(pos,neg)
sum(pos_neg[which(names(pos_neg)%in%rownames(Sustentacular@meta.data))]==pos_neg)
## [1] 737
Sustentacular=AddMetaData(
  object = Sustentacular,
  metadata = pos_neg,
  col.name = 'Sustentacular'
)
Idents(Sustentacular)=pos_neg
sum(pos_neg[which(names(pos_neg)%in%names(Idents(Sustentacular)))]==pos_neg)
## [1] 737
Sustentacular_genes=FindMarkers(Sustentacular, ident.1=1,ident.2=-1,test.use = "wilcox")
Sustentacular_genes_a=rownames(Sustentacular_genes)[Sustentacular_genes[2]>1 & Sustentacular_genes[1]<0.05]
Sustentacular_genes_b=rownames(Sustentacular_genes)[Sustentacular_genes[2]<-1 & Sustentacular_genes[1]<0.05]
Sustentacular_genes_names=c(Sustentacular_genes_a,Sustentacular_genes_b)

### Saving DE genes
write.csv(HBC_genes_a,paste(folder,"HBC_genes_a.csv",sep=""))
write.csv(HBC_genes_b,paste(folder,"HBC_genes_b.csv",sep=""))
write.csv(Sustentacular_genes_a,paste(folder,"Sustentacular_genes_a.csv",sep=""))
write.csv(Sustentacular_genes_b,paste(folder,"Sustentacular_genes_b.csv",sep=""))